Prepare libraries

suppressMessages(
  c(library(scater),
    library(Seurat),
    library(tidyverse),
    library(cowplot),
    library(Matrix.utils),
    library(edgeR),
    library(dplyr),
    library(magrittr),
    library(Matrix),
    library(purrr),
    library(reshape2),
    library(S4Vectors),
    library(tibble),
    library(SingleCellExperiment),
    library(pheatmap),
    library(apeglm),
    library(png),
    library(DESeq2),
    library(RColorBrewer),
    library(mixOmics),
    library(knitr),
    library(mosaic),
    library(gplots),
    library(clusterProfiler),
    library(gridExtra),
    library(GSVA),
    library(RANN))
)
##    [1] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##    [6] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##   [11] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##   [16] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##   [21] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##   [26] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##   [31] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##   [36] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##   [41] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##   [46] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##   [51] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##   [56] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##   [61] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##   [66] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##   [71] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##   [76] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##   [81] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##   [86] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##   [91] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##   [96] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [101] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [106] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [111] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [116] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [121] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [126] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [131] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [136] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [141] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [146] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [151] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [156] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [161] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [166] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [171] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [176] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [181] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [186] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [191] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [196] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [201] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [206] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [211] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [216] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [221] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [226] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [231] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [236] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [241] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [246] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [251] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [256] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [261] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [266] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [271] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [276] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [281] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [286] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [291] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [296] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [301] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [306] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [311] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [316] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [321] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [326] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [331] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [336] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [341] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [346] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [351] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [356] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [361] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [366] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [371] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [376] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [381] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [386] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [391] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [396] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [401] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [406] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [411] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [416] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [421] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [426] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [431] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [436] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [441] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [446] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [451] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [456] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [461] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [466] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [471] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [476] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [481] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [486] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [491] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [496] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [501] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [506] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [511] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [516] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [521] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [526] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [531] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [536] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [541] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [546] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [551] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [556] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [561] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [566] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [571] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [576] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [581] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [586] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [591] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [596] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [601] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [606] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [611] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [616] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [621] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [626] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [631] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [636] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [641] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [646] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [651] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [656] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [661] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [666] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [671] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [676] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [681] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [686] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [691] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [696] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [701] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [706] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [711] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [716] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [721] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [726] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [731] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [736] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [741] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [746] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [751] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [756] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [761] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [766] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [771] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [776] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [781] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [786] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [791] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [796] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [801] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [806] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [811] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [816] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [821] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [826] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [831] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [836] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [841] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [846] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [851] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [856] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [861] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [866] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [871] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [876] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [881] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [886] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [891] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [896] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [901] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [906] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [911] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [916] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [921] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [926] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [931] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [936] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [941] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [946] "mixOmics"             "lattice"              "MASS"                 "RColorBrewer"         "DESeq2"              
##  [951] "png"                  "apeglm"               "pheatmap"             "reshape2"             "magrittr"            
##  [956] "edgeR"                "limma"                "Matrix.utils"         "Matrix"               "cowplot"             
##  [961] "forcats"              "stringr"              "dplyr"                "purrr"                "readr"               
##  [966] "tidyr"                "tibble"               "tidyverse"            "Seurat"               "scater"              
##  [971] "ggplot2"              "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"         "matrixStats"         
##  [976] "Biobase"              "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [981] "BiocGenerics"         "parallel"             "stats4"               "stats"                "graphics"            
##  [986] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [991] "RANN"                 "GSVA"                 "gridExtra"            "clusterProfiler"      "gplots"              
##  [996] "mosaic"               "mosaicData"           "ggformula"            "ggstance"             "knitr"               
##  [ reached getOption("max.print") -- omitted 485 entries ]

Extract gene counts from Seurat object

# Bring in Seurat object
seurat <- readRDS('Results/seurat_labelled.rds')

# Extract raw counts and metadata to create SingleCellExperiment object
counts <- seurat@assays$RNA@counts 

metadata <- seurat@meta.data

# Set up metadata as desired for aggregation and DE analysis
metadata$cluster_id <- factor(seurat@active.ident)

# Create single cell experiment object
sce <- SingleCellExperiment(assays = list(counts = counts), 
                           colData = metadata)

pdf('Figure/cell_cluster.pdf',
    width = 10,
    height = 7)
DimPlot(seurat,
        reduction = "umap",
        label = TRUE,
        label.size = 4,
        repel = TRUE)
dev.off()
## RStudioGD 
##         2
pdf('Figure/cell_cluster_by_condition.pdf',
    width = 12,
    height = 7)
DimPlot(seurat,
        reduction = "umap",
        label = TRUE,
        label.size = 4,
        repel = TRUE,
        split.by = "condition") + NoLegend()
dev.off()
## RStudioGD 
##         2

Check number of cells from each sample in each cluster

# Checj the number of cells from each sample in each cluster
options(width = 100)
table(sce$cluster_id, sce$sample)
##                         
##                          Homeostasis-M1 Homeostasis-M2 Homeostasis-M3 HPDay4-M1 HPDay4-M2 HPDay4-M3
##   CD4                               105            153            127        19        56        13
##   CD8                               106             64            407      1572       750      1522
##   Plasma cell                        11             15              1       139       223       159
##   DC                                 59            103             69       151       169       185
##   Inflammatory Monocytes              8              5             19       101       115        75
##   Macrophage (M2)                   105            130             76        23        25        35
##   NK                                 17              9             27        18        20        21
##   Neutrophil                          6              2              7        41        46        84
##   Epithelial                          6              5             29         6         8        11
##   Monocyte (cycling)                 35             46             18        27        84        37
##   B cell                            998           1270           2485         0         1         2
##   pDC                                 8             11             16         0         0         0
cell_distribution <- as.data.frame.matrix(table(sce$cluster_id, sce$sample))
total_num <- colSums(cell_distribution)

df <- melt(as.matrix(cell_distribution), value.name = "cell_num")
df$Var1 <- factor(df$Var1)
colnames(df)[1:2] <- c("Cell_Type", "Sample")

ggplot(df, aes(x =  Sample, y = cell_num, fill = Cell_Type)) + 
  geom_bar(position = "fill", stat="identity") + ggtitle("Cell Number Distribution")

pdf('Figure/cell_distribution.pdf',
    width = 10,
    height = 7)
ggplot(df, aes(x =  Sample, y = cell_num, fill = Cell_Type)) + 
  geom_bar(position = "fill", stat="identity") + ggtitle("Cell Number Distribution")
dev.off()
## RStudioGD 
##         2

I actually cannot run DESeq on this dataset because the gene expression counts are already normalized so therefore not integers. I cannot load them into DESeq2.

Examine mTOR pathway leading edge genes

features <- c("Raf1", "Grk2", "Akt1", "Cdk1", "Csnk2b", "Mknk1", "Myd88", "Nfkbib", "Arhgdia", "Rac1", "Ripk1", "Stat2", "Tnfrsf1a", "Vav2", "Ywhaz", "Pdk1", "Trib3", "Dapp1", "Map2k3", "Mapk8", "Tbk1", "Ikbke", "Gsk3a", "Actr3", "Ap2m1", "Ptpn6")

DotPlot(seurat, features = features, cols = "RdBu") + RotatedAxis() + coord_flip()
## Warning in FetchData(object = object, vars = features): The following requested variables were not
## found: Grk2

pdf('Figure/mTOR_leadingedge_Dotplot.pdf',
    width = 6,
    height = 8)
DotPlot(seurat, features = features, cols = "RdBu") + RotatedAxis() + coord_flip()
## Warning in FetchData(object = object, vars = features): The following requested variables were not
## found: Grk2
dev.off()
## RStudioGD 
##         2
VlnPlot(object = seurat, 
        features = features,
        pt.size = 0.1,
        ncol = 4)
## Warning in FetchData(object = object, vars = features, slot = slot): The following requested
## variables were not found: Grk2

GSVA analysis

The GSAV analysis requires input of a expression matrix object and a list of gene sets.

The genesets here are Broad MsigDB genesets converted to mouse Entrez ID. These genesets are downloaded from here.

GSVA is only done on the Hallmark geneset.

DefaultAssay(seurat) <- "RNA"
# Prepare the normalized count matrix
# This count matrix is log transformed so we will need to use "Gaussian" distribution for GSVA
counts <- as.matrix(seurat@assays$RNA@data)

# convert gene identifiers to Entrez ID
suppressMessages(suppressWarnings(id_list <- read_csv("Data/mouse_symbol_Ensembl.csv")))

counts <- rownames_to_column(as.data.frame(counts), var = "gene_name")
counts <- inner_join(counts, id_list[,c(2:3)], by = c("gene_name" = "Marker Symbol"))
counts$gene_name <- NULL
non_duplicate_id <- which(duplicated(counts$"EntrezGene ID") == FALSE)
counts <- counts[non_duplicate_id,]
rownames(counts) <- counts$"EntrezGene ID"
counts$"EntrezGene ID" <- NULL
counts <- as.matrix(counts)

saveRDS(counts, "Data/counts.rds") 

# Prepare the genesets
#load("Data/GeneSet/mouse_H_v5p2.rdata")
#load("Data/GeneSet/mouse_c2_v5p2.rdata")
#load("Data/GeneSet/mouse_c6_v5p2.rdata")

# filter the C2 genesets to only canonical pathways from BIOCARTA, KEGG, PID, and REACTOME
#Mm.c2.canonical <- Mm.c2[c(grep("^KEGG", names(Mm.c2)),
#+ grep("^REACTOME", names(Mm.c2)),
#+ grep("^BIOCARTA", names(Mm.c2)),
#+ grep("^PID", names(Mm.c2)))]

#save(Mm.c2.canonical, file = "Data/GeneSet/mouse_c2_canonical.rdata")

#load("Data/GeneSet/mouse_c2_canonical.rdata")

# concocanate a master list genesets that I want to test for
#Mm.gsva.master <- c(Mm.H, Mm.c2.canonical, Mm.c6)
#save(Mm.gsva.master, file = "Data/GeneSet/mouse_gsva_master.rdata")

load("Data/GeneSet/mouse_H_v5p2.rdata")

Now let's run GSVA. A Gaussian kernal was used for this since the input counts were natural log normalized from Seurat.

The ECDF estimatation based on the Gaussuan kernal takes forever to run on a local machine. So the actual GSVA is run on the O2 cluster.

Here is the R code used on O2.

# This part is not run in the markdown
library(GSVA)
load("mouse_H_v5p2.rdata")
counts <- readRDS("counts.rds")

gsva_out <- gsva(counts, Mm.H, min.sz=5, max.sz=500,
                 kcdf="Gaussian", mx.diff=TRUE, verbose=TRUE, parallel.sz=5)

saveRDS(gsva_out, "gsva_out.rds")

Here is the script for submitting the O2 job. I used 5 cores for parallel computing.

#!/bin/bash
#SBATCH -p priority
#SBATCH -t 0-12:00
#SBATCH --mem 100G
#SBATCH -c 5
#SBATCH -e out/ca.%j.err
#SBATCH -o out/ca.%j.out

module load gcc/6.2.0 R/3.6.1
Rscript GSVA.R

Here we have the output from O2 cluster and load it here and merge it into metadata for later analysis.

gsva_res <- t(readRDS("Results/gsva_out.rds"))

seurat@meta.data <- cbind(seurat@meta.data, gsva_res)

write_rds(seurat,
          path = "Results/seurat_gsva.rds") 

Just some exploratory analysis using some Hallmark gene sets.

TCT-specific pathways

FeaturePlot(object = seurat, 
            features = c("HALLMARK_MTORC1_SIGNALING", "HALLMARK_PI3K_AKT_MTOR_SIGNALING", "HALLMARK_MYC_TARGETS_V1", "HALLMARK_MYC_TARGETS_V2", "HALLMARK_KRAS_SIGNALING_DN"),
            order = TRUE,
            min.cutoff = 'q10', 
            label = FALSE,
            repel = TRUE,
            ncol = 3)

pdf('Figure/GSVA_TCT-specific.pdf',
    width = 20,
    height = 10)
FeaturePlot(object = seurat, 
            features = c("HALLMARK_MTORC1_SIGNALING", "HALLMARK_PI3K_AKT_MTOR_SIGNALING", "HALLMARK_MYC_TARGETS_V1", "HALLMARK_MYC_TARGETS_V2", "HALLMARK_KRAS_SIGNALING_DN"),
            order = TRUE,
            min.cutoff = 'q10', 
            label = FALSE,
            repel = TRUE,
            ncol = 3)
dev.off()
## RStudioGD 
##         2
# creat a new annotation column for inflammatory monocytes and all other cells
seurat@meta.data$inf.monocyte <- "other cells"
seurat@meta.data$inf.monocyte[grep("Inflammatory Monocytes", seurat@meta.data$orig.annotation)] <- "Inflammatory Monocytes"
seurat@meta.data$inf.monocyte[grep("Neutrophil", seurat@meta.data$orig.annotation)] <- "Neutrophil"

Idents(object = seurat) <- "inf.monocyte"

# violin plot
VlnPlot(object = seurat, 
        features = c("HALLMARK_MTORC1_SIGNALING"),
        pt.size = 0) + geom_boxplot(width = 0.1)

pdf('Figure/GSVA_mTORC1_violin.pdf',
    width = 10,
    height = 7)
VlnPlot(object = seurat, 
        features = c("HALLMARK_MTORC1_SIGNALING"),
        pt.size = 0) + geom_boxplot(width = 0.1)
dev.off()
## RStudioGD 
##         2
# Stats
NES <-  seurat@meta.data$HALLMARK_MTORC1_SIGNALING

NES_inf_mono <- NES[grep("Inflammatory Monocytes", seurat@meta.data$inf.monocyte)]
NES_neutrophil <- NES[grep("Neutrophil", seurat@meta.data$inf.monocyte)]
NES_other <- NES[grep("other cells", seurat@meta.data$inf.monocyte)]
## check if NES is normally distributed
qqnorm(NES); qqline(NES)

## data is relatively normal, but I am using a non-parametric method here just to be safe
stats::wilcox.test(NES_inf_mono, NES_other, alternative = c("greater"), paired = FALSE)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  NES_inf_mono and NES_other
## W = 3159338, p-value < 2.2e-16
## alternative hypothesis: true location shift is greater than 0
stats::wilcox.test(NES_neutrophil, NES_other, alternative = c("greater"), paired = FALSE)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  NES_neutrophil and NES_other
## W = 915910, p-value = 0.9999
## alternative hypothesis: true location shift is greater than 0
# violin plot
VlnPlot(object = seurat, 
        features = c("HALLMARK_PI3K_AKT_MTOR_SIGNALING"),
        pt.size = 0) + geom_boxplot(width = 0.1)

pdf('Figure/GSVA_PI3K_mTOR_violin.pdf',
    width = 10,
    height = 7)
VlnPlot(object = seurat, 
        features = c("HALLMARK_PI3K_AKT_MTOR_SIGNALING"),
        pt.size = 0) + geom_boxplot(width = 0.1)
dev.off()
## RStudioGD 
##         2
# Stats
NES <-  seurat@meta.data$HALLMARK_PI3K_AKT_MTOR_SIGNALING
NES_inf_mono <- NES[grep("Inflammatory Monocytes", seurat@meta.data$inf.monocyte)]
NES_neutrophil <- NES[grep("Neutrophil", seurat@meta.data$inf.monocyte)]
NES_other <- NES[grep("other cells", seurat@meta.data$inf.monocyte)]
## check if NES is normally distributed
qqnorm(NES); qqline(NES)

## data is relatively normal, but I am using a non-parametric method here just to be safe
stats::wilcox.test(NES_inf_mono, NES_other, alternative = c("greater"), paired = FALSE)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  NES_inf_mono and NES_other
## W = 2910337, p-value < 2.2e-16
## alternative hypothesis: true location shift is greater than 0
stats::wilcox.test(NES_neutrophil, NES_other, alternative = c("greater"), paired = FALSE)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  NES_neutrophil and NES_other
## W = 1494134, p-value < 2.2e-16
## alternative hypothesis: true location shift is greater than 0

Opposingly regulated pathways in MK2i treatment

FeaturePlot(object = seurat, 
            features = c("HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION", "HALLMARK_APICAL_JUNCTION", "HALLMARK_PROTEIN_SECRETION", "HALLMARK_ANDROGEN_RESPONSE", "HALLMARK_P53_PATHWAY"),
            order = TRUE,
            min.cutoff = 'q10', 
            label = FALSE,
            repel = TRUE,
            ncol = 3)

Scoring cells using signature gene sets

I will be using the method described in the Biton et al., 2018 paper.

"To score a specific set of n genes in a given cell, a ‘background’ gene set was defined to control for differences in sequencing coverage and library complexity between cells (Kowalczyk et al., 2015). The background gene set was selected to be similar to the genes of interest in terms of expression level. Specifically, the 10n nearest gene neighbors in the 2-D space defined by mean expression and detection frequency across all cells were selected. The signature score for that cell was then defined as the mean expression of the n signature genes in that cell, minus the mean expression of the 10n background genes in that cell."

First I need to acquire gene level metadata to generate the 2-D space for each gene. The 2 dimensions needed here are mean expression and detection frequency.

counts <- seurat@assays$RNA@counts 
counts_df <- as.data.frame(counts)
gene_metadata <- as.data.frame(cbind(rownames(counts_df), rowMeans(counts_df), rowSums(!counts_df == 0)/dim(counts_df)[2]))
gene_metadata$V1 <- NULL
colnames(gene_metadata) <- c("mean_expression", "detection_freq")

# index the gene list of interest to get their coord on the 2-D space
gene_index <- match(features, rownames(gene_metadata))

# remove NA
gene_index <- gene_index[!is.na(gene_index)]

feature_df <- gene_metadata[gene_index,]
other_df <- gene_metadata[-gene_index,]

# find 10n nearest neighbor of these features in the background list
bg.gene <- nn2(other_df, feature_df, k = 10)

bg.gene.index <- unique(as.vector(bg.gene$nn.idx))

bg.gene.df <- other_df[bg.gene.index,]

# now we have the gene signature of interest and the background gene list
# signature score for each cell is the mean expression of the signature genes minus the mean expression of the background genes
bg.index <- match(rownames(bg.gene.df), rownames(counts_df))

sig.df <- counts_df[gene_index, ]
bg.df <- counts_df[bg.index,]

sig.score <- colMeans(sig.df) - colMeans(bg.df)

seurat@meta.data$"mTOR_leading_edge_sig_score" <- sig.score

write_rds(seurat,
          path = "Results/seurat_gsva.rds") 

FeaturePlot(object = seurat, 
            features = c("mTOR_leading_edge_sig_score"),
            order = TRUE,
            min.cutoff = 'q10', 
            label = FALSE,
            repel = TRUE)

pdf('Figure/mTOR_leading_edge_signature.pdf',
    width = 7,
    height = 5)
FeaturePlot(object = seurat, 
            features = c("mTOR_leading_edge_sig_score"),
            order = TRUE,
            min.cutoff = 'q10', 
            label = FALSE,
            repel = TRUE)
dev.off()
## RStudioGD 
##         2
pdf('Figure/mTOR_leading_edge_signature_Voilin.pdf',
    width = 10,
    height = 7)
# violin plot
VlnPlot(object = seurat, 
        features = c("mTOR_leading_edge_sig_score"),
        pt.size = 0) + geom_boxplot(width = 0.1)
dev.off()
## RStudioGD 
##         2
# Stats
sig.score_inf_mono <- sig.score[grep("Inflammatory Monocytes", seurat@meta.data$inf.monocyte)]

sig.score_neutrophil <- sig.score[grep("Neutrophil", seurat@meta.data$inf.monocyte)]
sig.score_other <- sig.score[grep("other cells", seurat@meta.data$inf.monocyte)]

## check if sig.score is normally distributed
qqnorm(sig.score); qqline(sig.score)

## data is relatively normal, but I am using a non-parametric method here just to be safe
stats::wilcox.test(sig.score_inf_mono, sig.score_other, alternative = c("greater"), paired = FALSE)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  sig.score_inf_mono and sig.score_other
## W = 3289766, p-value < 2.2e-16
## alternative hypothesis: true location shift is greater than 0
stats::wilcox.test(sig.score_neutrophil, sig.score_other, alternative = c("greater"), paired = FALSE)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  sig.score_neutrophil and sig.score_other
## W = 1959241, p-value < 2.2e-16
## alternative hypothesis: true location shift is greater than 0